####################################################################################################
## Author: Molly Ariotti
## Project: Government Type and Public Spending in Africa
## Date: 2020-07-16
##
## Project Description: LSQ 2021 Replication
##
####################################################################################################

## load libraries
library(readstata13)
library(pcse)
library(plm)
library(clubSandwich)
library(mvtnorm)
library(lmtest)
library(multiwayvcov)
library(panelAR)
library(stargazer)
library(xtable)
library(lmboot)
library(clusterSEs)

## load data
data <- read.dta13("AfricanSpendingRR2.dta")
names(data)

## create new gdppc and population variables
data$ln_ny_gdp_pcap_kd <- data$ny_gdp_pcap_kd / 1000
data$pop1000s <- data$sp_pop_totl / 1000

## make lagged data
data.lag <- data
data.lag$year <- data$year + 1

## merge data and lagged data
data <- merge(data, data.lag, by=c("ccode", "year"), all=T, suffixes = c("","_lag"), all.x = TRUE, all.y = FALSE)

## define summary function
summary_function <- function(data_frame){
    
    means <- apply(data_frame,2,mean,na.rm=T)
    sds <- apply(data_frame,2,sd,na.rm=T)
    mins <- apply(data_frame,2,min,na.rm=T)
    maxs <- apply(data_frame,2,max,na.rm=T)
    
    return(data.frame(varnames=names(data_frame), mean=means, sd=sds, min=mins, max=maxs))
}



## Pres drops out of fe and two-way fe models
table(data$ccode, data$pres)


####################################################################################################
### DV: ne_con_govt_zs
####################################################################################################

## make empty lists to store model output
fit_pooling_psce <- fit_fe_psce <- fit_fe2way_psce <- list()
n_pooling_psce <- n_fe_psce <- n_fe2way_psce <- list()

for(i in 1:4){
## spending_min_wehner (key IV)
if(i==1)FML <- "ne_con_govt_zs ~ spending_min_wehner_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + spending_min_wehner + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"

## m_coalition (key IV)
if(i==2)FML <- "ne_con_govt_zs ~ m_coalition_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + m_coalition + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"

## total_min_port (key iV)
if(i==3)FML <- "ne_con_govt_zs ~ total_min_port_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + total_min_port + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"

## spending_min_wehner + m_coalition (key IV)
if(i==4)FML <- "ne_con_govt_zs ~ spending_min_wehner_lag + m_coalition_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + spending_min_wehner + m_coalition + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"


## pooled OLS regression with SE clustered by group using PCSE from the plm library
fit <- plm(FML, data=data, model = "pooling", effect = "individual", index=c("ccode", "year"))
fit_pooling_psce[[i]] <- coeftest(fit, vcov=vcovBK(fit, type="HC0", cluster=c("group", "time")))
n_pooling_psce[[i]] <- nrow(fit$model)

## FE regression with SE clustered by group using PCSE from the plm library
fit <- plm(FML, data=data, model = "within", effect = "individual", index=c("ccode", "year"))
fit_fe_psce[[i]] <- coeftest(fit, vcov=vcovBK(fit, type="HC0", cluster=c("group", "time")))
n_fe_psce[[i]] <- nrow(fit$model)

## two-way FE regression with SE clustered by group using PCSE from the plm library
fit <- plm(FML, data=data, model = "within", effect = "twoway", index=c("ccode", "year"))
fit_fe2way_psce[[i]] <- coeftest(fit, vcov=vcovBK(fit, type="HC0", cluster=c("group", "time")))
n_fe2way_psce[[i]] <- nrow(fit$model)

}

## print and generate latex text for summary of data used in the model fitting

fit <- lm(ne_con_govt_zs ~ spending_min_wehner_lag + m_coalition_lag + total_min_port_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + spending_min_wehner + m_coalition + total_min_port + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs, data=data)

## TABLE 2
summary_function(fit$model)
xtable(summary_function(fit$model))

## verify sample size (it should be the same for all the fitted models below
nrow(fit$model)

## TABLE 4 
## print models for DV: spending_min_wehner
fit_pooling_psce[[1]]
n_pooling_psce[[1]]
fit_fe_psce[[1]]
n_fe_psce[[1]]
fit_fe2way_psce[[1]]
n_fe2way_psce[[1]]


## TABLE 3
## print models for DV: m_coalition
fit_pooling_psce[[2]]
n_pooling_psce[[2]]
fit_fe_psce[[2]]
n_fe_psce[[2]]
fit_fe2way_psce[[2]]
n_fe2way_psce[[2]]

## TABLE 4
## print models for DV: total_min_port
fit_pooling_psce[[3]]
n_pooling_psce[[3]]
fit_fe_psce[[3]]
n_fe_psce[[2]]
fit_fe2way_psce[[3]]
n_fe2way_psce[[3]]

## print models for DV: spending_min_wehner AND m_coalition
fit_pooling_psce[[4]]
n_pooling_psce[[4]]
fit_fe_psce[[4]]
n_fe_psce[[4]]
fit_fe2way_psce[[4]]
n_fe2way_psce[[4]]

## generate latex text
?stargazer  ## run this line to see the syntax for setting up the stargazer tables
#stargazer(fit_fe_psce[[1]], fit_fe_psce[[2]], fit_fe_psce[[3]])
#stargazer(fit_pooling_psce[[1]], fit_pooling_psce[[2]], fit_pooling_psce[[3]], fit_fe2way_psce[[1]], fit_fe2way_psce[[2]], fit_fe2way_psce[[3]])

## TABlE 4
## produce latex text for spending_min and total_min table
stargazer(fit_pooling_psce[[3]], fit_pooling_psce[[1]], fit_fe_psce[[3]], fit_fe_psce[[1]], fit_fe2way_psce[[3]], fit_fe2way_psce[[1]])

## TABLE 3
## produce latex text for m_coalition table
stargazer(fit_pooling_psce[[2]], fit_fe_psce[[2]], fit_fe2way_psce[[2]])


## end models in manuscript
##
##


####################################################################################################
## Appendices and other robustness tests
####################################################################################################

####################################################################################################
### DV: IMF_expense
####################################################################################################

## make empty lists to store model output
fit_pooling_psce <- fit_fe_psce <- fit_fe2way_psce <- list()

for(i in 1:3){
## spending_min_wehner (key IV)
if(i==1)FML <- "IMF_expense ~ spending_min_wehner_lag + govtsyear_lag + ENPP_lag  + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + IMF_expense_lag + spending_min_wehner + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"

## m_coalition (key IV)
if(i==2)FML <- "IMF_expense ~ m_coalition_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + IMF_expense_lag + m_coalition + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s+ ny_gdp_totl_rt_zs"

## total_min_port (key iV)
if(i==3)FML <- "IMF_expense ~ total_min_port_lag + govtsyear_lag + ENPP_lag  + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + IMF_expense_lag + total_min_port + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"



## pooled OLS regression with SE clustered by group using PCSE from the plm library
fit <- plm(FML, data=data, model = "pooling", effect = "individual", index=c("ccode", "year"))
fit_pooling_psce[[i]] <- coeftest(fit, vcov=vcovBK(fit, type="HC0", cluster=c("group", "time")))
n_pooling_psce[[i]] <- nrow(fit$model)

## FE regression with SE clustered by group using PCSE from the plm library
fit <- plm(FML, data=data, model = "within", effect = "individual", index=c("ccode", "year"))
fit_fe_psce[[i]] <- coeftest(fit, vcov=vcovBK(fit, type="HC0", cluster=c("group", "time")))
n_fe_psce[[i]] <- nrow(fit$model)

## two-way FE regression with SE clustered by group using PCSE from the plm library
fit <- plm(FML, data=data, model = "within", effect = "twoway", index=c("ccode", "year"))
fit_fe2way_psce[[i]] <- coeftest(fit, vcov=vcovBK(fit, type="HC0", cluster=c("group", "time")))
n_fe2way_psce[[i]] <- nrow(fit$model)

}

## print and generate latex text for summary of data used in the model fitting

fit <- lm(IMF_expense ~ spending_min_wehner_lag + m_coalition_lag + total_min_port_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + IMF_expense_lag + spending_min_wehner + m_coalition + total_min_port + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs, data=data)

summary_function(fit$model)
xtable(summary_function(fit$model))
 
## verify sample size (it should be the same for all the fitted models below
nrow(fit$model)


## print models for DV: spending_min_wehner
fit_pooling_psce[[1]]
n_pooling_psce[[1]]
fit_fe_psce[[1]]
n_fe_psce[[1]]
fit_fe2way_psce[[1]]
n_fe2way_psce[[1]]


## print models for DV: m_coalition
fit_pooling_psce[[2]]
n_pooling_psce[[2]]
fit_fe_psce[[2]]
n_fe_psce[[2]]
fit_fe2way_psce[[2]]
n_fe2way_psce[[2]]

## print models for DV: total_min_port
fit_pooling_psce[[3]]
n_pooling_psce[[3]]
fit_fe_psce[[3]]
n_fe_psce[[2]]
fit_fe2way_psce[[3]]
n_fe2way_psce[[3]]

## generate latex text -- IMF appendix table
stargazer(fit_fe_psce[[2]], fit_fe_psce[[3]], fit_fe_psce[[1]])




## end second set of models
##
##

####################################################################################################
### DV: WEO_expenditure
####################################################################################################

## make empty lists to store model output
fit_pooling_psce <- fit_fe_psce <- fit_fe2way_psce <- list()

for(i in 1:3){
## spending_min_wehner (key IV)
if(i==1)FML <- "WEO_expenditure ~ spending_min_wehner_lag + govtsyear_lag + ENPP_lag  + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + WEO_expenditure_lag + spending_min_wehner + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s+ ny_gdp_totl_rt_zs"

## m_coalition (key IV)
if(i==2)FML <- "WEO_expenditure ~ m_coalition_lag + govtsyear_lag + ENPP_lag  + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + WEO_expenditure_lag + m_coalition + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"

## total_min_port (key iV)
if(i==3)FML <- "WEO_expenditure ~ total_min_port_lag + govtsyear_lag + ENPP_lag  + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + WEO_expenditure_lag + total_min_port + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"


## pooled OLS regression with SE clustered by group using PCSE from the plm library
fit <- plm(FML, data=data, model = "pooling", effect = "individual", index=c("ccode", "year"))
fit_pooling_psce[[i]] <- coeftest(fit, vcov=vcovBK(fit, type="HC0", cluster=c("group", "time")))
n_pooling_psce[[i]] <- nrow(fit$model)

## FE regression with SE clustered by group using PCSE from the plm library
fit <- plm(FML, data=data, model = "within", effect = "individual", index=c("ccode", "year"))
fit_fe_psce[[i]] <- coeftest(fit, vcov=vcovBK(fit, type="HC0", cluster=c("group", "time")))
n_fe_psce[[i]] <- nrow(fit$model)

## two-way FE regression with SE clustered by group using PCSE from the plm library
fit <- plm(FML, data=data, model = "within", effect = "twoway", index=c("ccode", "year"))
fit_fe2way_psce[[i]] <- coeftest(fit, vcov=vcovBK(fit, type="HC0", cluster=c("group", "time")))
n_fe2way_psce[[i]] <- nrow(fit$model)

}

## print and generate latex text for summary of data used in the model fitting
summary_function(fit$model)
xtable(summary_function(fit$model))
 
## verify sample size (it should be the same for all the fitted models below
nrow(fit$model)


## print models for DV: spending_min_wehner
fit_pooling_psce[[1]]
n_pooling_psce[[1]]
fit_fe_psce[[1]]
n_fe_psce[[1]]
fit_fe2way_psce[[1]]
n_fe2way_psce[[1]]


## print models for DV: m_coalition
fit_pooling_psce[[2]]
n_pooling_psce[[2]]
fit_fe_psce[[2]]
n_fe_psce[[2]]
fit_fe2way_psce[[2]]
n_fe2way_psce[[2]]

## print models for DV: total_min_port
fit_pooling_psce[[3]]
n_pooling_psce[[3]]
fit_fe_psce[[3]]
n_fe_psce[[2]]
fit_fe2way_psce[[3]]
n_fe2way_psce[[3]]



## generate latex text -- IMF appendix table
stargazer(fit_fe_psce[[2]], fit_fe_psce[[3]], fit_fe_psce[[1]])


## end third set of models


####################################################################################################
### Marginal Effects for DV: ne_con_govt_zs
####################################################################################################

## m_coalition (key IV)
FML <- "ne_con_govt_zs ~ m_coalition_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + m_coalition + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"

## FE regression with SE clustered by group using PCSE from the plm library
fit <- plm(FML, data=data, model = "within", effect = "individual", index=c("ccode", "year"))
fit_fe_psce <- coeftest(fit, vcov=vcovBK(fit, type="HC0", cluster=c("group", "time")))
fit_vcovBK <- vcovBK(fit, type="HC0", cluster=c("group", "time"))

#fit
#fit_fe_psce
#fit_vcovBK


COEFF <- fit$coeff
SIGMA <- fit_vcovBK[1:nrow(fit_vcovBK), 1:ncol(fit_vcovBK)]
betas <- rmvnorm(10000,mean=COEFF, sigma=SIGMA)


value <- 0:12
value_lag <- 0:12
mat_effect <- mat_q975 <- mat_q025 <- mat_q95 <- mat_q05 <- mat_q90 <- mat_q10 <- mat_q75 <- mat_q25 <- matrix(NA, nrow=length(value), ncol=length(value_lag))


for(i in 1:length(value)){
    for(j in 1:length(value_lag)){
        draws <- betas[,"m_coalition_lag"] * value_lag[j] + betas[,"m_coalition"] * value[i]
        mat_effect[i,j] <- mean(draws)
        mat_q025[i,j] <- quantile(draws, .025)
        mat_q975[i,j] <- quantile(draws, .975)
        mat_q05[i,j] <- quantile(draws, .05)
        mat_q95[i,j] <- quantile(draws, .95)
        mat_q10[i,j] <- quantile(draws, .10)
        mat_q90[i,j] <- quantile(draws, .90)
        mat_q25[i,j] <- quantile(draws, .25)
        mat_q75[i,j] <- quantile(draws, .75)
    }
}


values <- c(mat_effect[c(1,13),c(1,13)])[2:4]
values_975 <- c(mat_q975[c(1,13),c(1,13)])[2:4]
values_025 <- c(mat_q025[c(1,13),c(1,13)])[2:4]
values_95 <- c(mat_q95[c(1,13),c(1,13)])[2:4]
values_05 <- c(mat_q05[c(1,13),c(1,13)])[2:4]

barplot(values, beside=T, space=0, ylim=c(-2,2), col=grey(.9))
for(i in 1:3)lines(c(i-.5,i-.5), c(values_025[i], values_975[i]), lwd=1)
for(i in 1:3)lines(c(i-.5,i-.5), c(values_05[i], values_95[i]), lwd=2)



values <- c(mat_effect[1:13,1])
values_975 <- c(mat_q975[1:13,1])
values_025 <- c(mat_q025[1:13,1])
values_95 <- c(mat_q95[1:13,1])
values_05 <- c(mat_q05[1:13,1])

barplot(values, beside=T, space=0, ylim=c(-2,2), col=grey(.9))
for(i in 1:13)lines(c(i-.5,i-.5), c(values_025[i], values_975[i]), lwd=1)
for(i in 1:13)lines(c(i-.5,i-.5), c(values_05[i], values_95[i]), lwd=2)

values <- c(mat_effect[1:13,13])
values_975 <- c(mat_q975[1:13,13])
values_025 <- c(mat_q025[1:13,13])
values_95 <- c(mat_q95[1:13,13])
values_05 <- c(mat_q05[1:13,13])


barplot(values, beside=T, space=0, ylim=c(-2,2), col=grey(.9))
for(i in 1:13)lines(c(i-.5,i-.5), c(values_025[i], values_975[i]), lwd=1)
for(i in 1:13)lines(c(i-.5,i-.5), c(values_05[i], values_95[i]), lwd=2)


## descriptive data
table(data$m_coalition, data$m_coalition_lag)
table(data$m_coalition, data$m_coalition_lag) / sum(table(data$m_coalition, data$m_coalition_lag))




## make some binary data
data$binary_m_coalition_lag <- ifelse(data$m_coalition_lag>6,1,0)
data$binary_m_coalition <- ifelse(data$m_coalition>6,1,0)


FML <- "ne_con_govt_zs ~ binary_m_coalition_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + binary_m_coalition + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"

## FE regression with SE clustered by group using PCSE from the plm library
fit <- plm(FML, data=data, model = "within", effect = "individual", index=c("ccode", "year"))
fit_fe_psce <- coeftest(fit, vcov=vcovBK(fit, type="HC0", cluster=c("group", "time")))
fit_vcovBK <- vcovBK(fit, type="HC0", cluster=c("group", "time"))

fit_fe_psce



COEFF <- fit$coeff
SIGMA <- fit_vcovBK[1:nrow(fit_vcovBK), 1:ncol(fit_vcovBK)]
betas <- rmvnorm(10000,mean=COEFF, sigma=SIGMA)


value <- 0:1
value_lag <- 0:1
mat_effect <- mat_q975 <- mat_q025 <- mat_q95 <- mat_q05 <- mat_q90 <- mat_q10 <- mat_q75 <- mat_q25 <- matrix(NA, nrow=length(value), ncol=length(value_lag))


for(i in 1:length(value)){
    for(j in 1:length(value_lag)){
        draws <- betas[,"binary_m_coalition_lag"] * value_lag[j] + betas[,"binary_m_coalition"] * value[i]
        mat_effect[i,j] <- mean(draws)
        mat_q025[i,j] <- quantile(draws, .025)
        mat_q975[i,j] <- quantile(draws, .975)
        mat_q05[i,j] <- quantile(draws, .05)
        mat_q95[i,j] <- quantile(draws, .95)
        mat_q10[i,j] <- quantile(draws, .10)
        mat_q90[i,j] <- quantile(draws, .90)
        mat_q25[i,j] <- quantile(draws, .25)
        mat_q75[i,j] <- quantile(draws, .75)
    }
}

mat_effect


values <- c(mat_effect)[2:4]
values_975 <- c(mat_q975)[2:4]
values_025 <- c(mat_q025)[2:4]

barplot(values, beside=T, space=0, ylim=c(-2,2))
for(i in 1:3)lines(c(i-.5,i-.5), c(values_025[i], values_975[i]), lwd=1)





####################################################################################################
### DV: ne_con_govt_zs WITH NO LAGS (for appendix robustness check update)
####################################################################################################

## make empty lists to store model output
fit_pooling_psce <- fit_fe_psce <- fit_fe2way_psce <- list()
n_pooling_psce <- n_fe_psce <- n_fe2way_psce <- list()

for(i in 1:4){
## spending_min_wehner (key IV)
if(i==1)FML <- "ne_con_govt_zs ~ spending_min_wehner + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ny_gdp_totl_rt_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ne_con_govt_zs_lag"

## m_coalition (key IV)
if(i==2)FML <- "ne_con_govt_zs ~  m_coalition + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ny_gdp_totl_rt_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ne_con_govt_zs_lag"

## total_min_port (key iV)
if(i==3)FML <- "ne_con_govt_zs ~  total_min_port + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ny_gdp_totl_rt_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s  + ne_con_govt_zs_lag"

## spending_min_wehner + m_coalition (key IV)
if(i==4)FML <- "ne_con_govt_zs ~ + spending_min_wehner + m_coalition + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ny_gdp_totl_rt_zs  + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s  + ne_con_govt_zs_lag"


## pooled OLS regression with SE clustered by group using PCSE from the plm library
fit <- plm(FML, data=data, model = "pooling", effect = "individual", index=c("ccode", "year"))
fit_pooling_psce[[i]] <- coeftest(fit, vcov=vcovBK(fit, type="HC0", cluster=c("group", "time")))
n_pooling_psce[[i]] <- nrow(fit$model)

## FE regression with SE clustered by group using PCSE from the plm library
fit <- plm(FML, data=data, model = "within", effect = "individual", index=c("ccode", "year"))
fit_fe_psce[[i]] <- coeftest(fit, vcov=vcovBK(fit, type="HC0", cluster=c("group", "time")))
n_fe_psce[[i]] <- nrow(fit$model)

## two-way FE regression with SE clustered by group using PCSE from the plm library
fit <- plm(FML, data=data, model = "within", effect = "twoway", index=c("ccode", "year"))
fit_fe2way_psce[[i]] <- coeftest(fit, vcov=vcovBK(fit, type="HC0", cluster=c("group", "time")))
n_fe2way_psce[[i]] <- nrow(fit$model)

}

## print and generate latex text for summary of data used in the model fitting

fit <- lm(ne_con_govt_zs ~ spending_min_wehner_lag + m_coalition_lag + total_min_port_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + spending_min_wehner + m_coalition + total_min_port + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs, data=data)


summary_function(fit$model)
xtable(summary_function(fit$model))

## verify sample size (it should be the same for all the fitted models below
nrow(fit$model)


## print models for DV: spending_min_wehner
fit_pooling_psce[[1]]
n_pooling_psce[[1]]
fit_fe_psce[[1]]
n_fe_psce[[1]]
fit_fe2way_psce[[1]]
n_fe2way_psce[[1]]

## print models for DV: m_coalition
fit_pooling_psce[[2]]
n_pooling_psce[[2]]
fit_fe_psce[[2]]
n_fe_psce[[2]]
fit_fe2way_psce[[2]]
n_fe2way_psce[[2]]

## print models for DV: total_min_port
fit_pooling_psce[[3]]
n_pooling_psce[[3]]
fit_fe_psce[[3]]
n_fe_psce[[2]]
fit_fe2way_psce[[3]]
n_fe2way_psce[[3]]

## print models for DV: spending_min_wehner AND m_coalition
fit_pooling_psce[[4]]
n_pooling_psce[[4]]
fit_fe_psce[[4]]
n_fe_psce[[4]]
fit_fe2way_psce[[4]]
n_fe2way_psce[[4]]

## generate latex text for present value only models in the appendix
stargazer(fit_fe_psce[[2]], fit_fe_psce[[3]], fit_fe_psce[[1]])




####################################################################################################
### DV: ne_con_govt_zs jackknife standard errors for linear model only
####################################################################################################

fit_jackknife <- list()
for(i in 1:3){
    ## spending_min_wehner (key IV)
    if(i==1)FML <- "ne_con_govt_zs ~ spending_min_wehner_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + spending_min_wehner + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"
    
    ## m_coalition (key IV)
    if(i==2)FML <- "ne_con_govt_zs ~ m_coalition_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + m_coalition + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"
    
    ## total_min_port (key iV)
    if(i==3)FML <- "ne_con_govt_zs ~ total_min_port_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + total_min_port + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"
    
    ## spending_min_wehner + m_coalition (key IV)
    if(i==4)FML <- "ne_con_govt_zs ~ spending_min_wehner_lag + m_coalition_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + spending_min_wehner + m_coalition + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"
    
    ## generate initial linear model fit
    fit <- lm(FML, data=data)
    model_data <- fit$model
    lm_coeff <- fit$coefficients
    
    ## lapply jackknife to all data subsets
    jackknife <- lapply(1:nrow(model_data), function(j){
        fit <- lm(FML, data=model_data[-j,])
        return(as.numeric(coef(fit)))
    })
    mat <- do.call("rbind", jackknife)
    
    ## calculate sample size
    n <- nrow(model_data)
    
    ## calculate jackknife statistics
    jackknife_coefficients <- apply(mat, 2, mean)
    jackknife_se <- sqrt((n-1)*(apply(mat, 2, var))) ## see Efron and Hasti. 2016. eq 10.6, pg 156
    jackknife_tstat <- jackknife_coefficients/jackknife_se
    jackknife_pvalue <- 2*pt(-abs(jackknife_tstat),df=n-1)
    
    ## dataframe with model evaluation statistics
    fit_jackknife[[i]] <- data.frame(
    #varnames=names(fit$coefficients),
    lm_coef=fit$coefficients,
    lm_se=summary(fit)$coefficients[,2],
    lm_tstat=summary(fit)$coefficients[,3],
    lm_pvalue=summary(fit)$coefficients[,4],
    jn_coef=jackknife_coefficients,
    jn_se=jackknife_se,
    jn_tstat=jackknife_tstat,
    jn_pvalue=jackknife_pvalue)
    
}

fit_jackknife



####################################################################################################
### DV: ne_con_govt_zs boot standard CIs for all 3 panel model specifications 
####################################################################################################


## make empty lists to store model output
fit_pooling_psce_boots <- fit_fe_psce_boots <- fit_fe2way_psce_boots <- list()

pdata <- pdata.frame(data, index=c("ccode", "year"))

for(i in 1:3){
    ## spending_min_wehner (key IV)
    if(i==1)FML <- "ne_con_govt_zs ~ spending_min_wehner_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + spending_min_wehner + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"
    
    ## m_coalition (key IV)
    if(i==2)FML <- "ne_con_govt_zs ~ m_coalition_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + m_coalition + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"
    
    ## total_min_port (key iV)
    if(i==3)FML <- "ne_con_govt_zs ~ total_min_port_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + total_min_port + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"
    
    ## spending_min_wehner + m_coalition (key IV)
    if(i==4)FML <- "ne_con_govt_zs ~ spending_min_wehner_lag + m_coalition_lag + govtsyear_lag + ENPP_lag + dt_oda_alld_gd_zs_lag + ln_ny_gdp_pcap_kd_lag + sl_uem_totl_zs_lag + ne_trd_gnfs_zs_lag + sp_pop_dpnd_lag + pop1000s_lag + ny_gdp_totl_rt_zs_lag + ne_con_govt_zs_lag + spending_min_wehner + m_coalition + govtsyear + ENPP + pres + PR + dt_oda_alld_gd_zs + ln_ny_gdp_pcap_kd + sl_uem_totl_zs + ne_trd_gnfs_zs + sp_pop_dpnd + pop1000s + ny_gdp_totl_rt_zs"
    
    
    
    pdata <- pdata.frame(lm(paste(FML,"+ccode+year"), data=data)$model, index=c("ccode", "year"))
    dim(pdata)
    
    ## pooled OLS regression with SE clustered by group using PCSE from the plm library
    fit <- plm(FML, data=pdata, model = "pooling", effect = "individual", index=c("ccode", "year"))
    fit_pooling_psce_boots[[i]] <- cluster.wild.plm(mod=fit, dat=pdata, cluster="group", ci.level = 0.95, boot.reps = 1000, report = TRUE, prog.bar = TRUE)
    
    ## FE regression with SE clustered by group using PCSE from the plm library
    fit <- plm(FML, data=pdata, model = "within", effect = "individual", index=c("ccode", "year"))
    fit_fe_psce_boots[[i]] <- cluster.wild.plm(mod=fit, dat=pdata, cluster="group", ci.level = 0.95, boot.reps = 1000, report = TRUE, prog.bar = TRUE)
    
    ## two-way FE regression with SE clustered by group using PCSE from the plm library
    fit <- plm(FML, data=pdata, model = "within", effect = "twoway", index=c("ccode", "year"))
    fit_fe2way_psce_boots[[i]] <- cluster.wild.plm(mod=fit, dat=pdata, cluster="group", ci.level = 0.95, boot.reps = 1000, report = TRUE, prog.bar = TRUE)
    
}

fit_pooling_psce_boots[[1]]
fit_fe_psce_boots[[1]]
fit_fe2way_psce_boots[[1]]

fit_pooling_psce_boots[[2]]
fit_fe_psce_boots[[2]]
fit_fe2way_psce_boots[[2]]

fit_pooling_psce_boots[[3]]
fit_fe_psce_boots[[3]]
fit_fe2way_psce_boots[[3]]




